; Texturing with bilinear filtering, real Phongs shading
; and glass like effect (slices buffer for object)
; parallel. Thanks to authors of 3dica tutorial.
; Moreover, I read many tutorials and articles realeased
; in internet.
; Implemented in FASM by Maciej Guba.
; http://macgub.co.pl

ROUND2 equ 10
; include "labs.inc"  ; dbg / profiling purpose
; weighted, sorted transparency
; first stage - look at glass_tex_tri proc

glass_tex_line_sl:
; in:
;    xmm0 - normal vector 1
;    xmm1 - normal vect 2
;    xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float
;    xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float
;    xmm2 - lo -> hi y_min, y_max, x_min, x_max
;           as dword integers

;    eax - x1
;    ebx - x2
;    ecx - y
;    edi - screen buffer
;    esi - slices buffer filled with dd floats
;    edx - texture pointer (handle)
;    xmm6 - lo -> hi dword x_res, tex_shift, tex_x * 4,
;          tex size as dword integers


   push  ebp
   mov   ebp,esp
   sub   ebp,128
   sub   esp,256+16
   and   ebp,0xfffffff0

   .n1        equ [ebp-16]
   .x_res     equ [ebp-32]
   .tex_shift equ [ebp-28]
   .tex_x4    equ [ebp-24]
   .tex_size  equ [ebp-20]
;  .n2        equ [ebp-32]
   .yd        equ [ebp-36]
   .xd        equ [ebp-40]
   .yf        equ [ebp-44]
   .xf        equ [ebp-48]

   .dz        equ [ebp-56]
   .dty       equ [ebp-60]
   .dtx       equ [ebp-64]

   .x_max     equ [ebp-68]
   .x_min     equ [ebp-72]
   .y_max     equ [ebp-76]
   .y_min     equ [ebp-80]

   .dn     equ [ebp-96]

   .s_fact equ dword[ebp-100]
   .y      equ [ebp-104]
   .lx1    equ [ebp-108]
   .lx2    equ [ebp-112]
   .tex_m2 equ [ebp]
   .cnv    equ [ebp+16]

   .z1     equ [ebp+40]
   .ty1    equ [ebp+36]
   .tx1    equ [ebp+32]

   .aprox  equ [ebp+48]
;  .z2     equ [ebp+56]
;  .ty2    equ [ebp+52]
;  .tx2    equ [ebp+48]
;  .cz     equ [ebp+72]
;  .cty    equ [ebp+58]
;  .ctx    equ [ebp+64]
   .word_max_f equ [ebp+64]
   .tx_ptr     equ [ebp+80]
   .mask_trp   equ dword[ebp+84]
   .zbuff      equ [ebp+88]
   .screen     equ [ebp+92]
   .mask_255f  equ [ebp+96]


        movaps    .x_res,xmm6
        mov       .y,ecx
        movaps    .y_min,xmm2
        pcmpeqd   xmm6,xmm6
        mov       ecx,.tex_x4
        psrld     xmm6,24
        shr       ecx,2
        cvtdq2ps  xmm6,xmm6
        dec       ecx
        movaps    .mask_255f,xmm6
        mulps     xmm6,xmm6


        movaps    .word_max_f,xmm6
        cvtsi2ss  xmm6,ecx
        mov       .s_fact,slices_factor1
        mov       ecx,0.13
        shufps    xmm6,xmm6,0
        movd      xmm7,ecx
        shufps    xmm7,xmm7,0

        mov       .lx1,eax
        mov       .lx2,ebx
        movaps    .tex_m2,xmm6
        movaps    .aprox,xmm7
        mov       .tx_ptr,edx
        sub       ebx,eax
        cvtsi2ss  xmm7,ebx
        rcpss     xmm7,xmm7
        shufps    xmm7,xmm7,0
        subps     xmm1,xmm0
        mulps     xmm1,xmm7
        movaps    .dn,xmm1
        subps     xmm5,xmm3
        mulps     xmm5,xmm7
        movaps    .dtx,xmm5
        mov       ebx,.lx1
        cmp       ebx,.x_min  ; clipping on function4
        jge       @f
        mov       eax,.x_min
        sub       eax,ebx
        cvtsi2ss  xmm7,eax
        shufps    xmm7,xmm7,0
        mulps     xmm5,xmm7
        mulps     xmm1,xmm7
        addps     xmm3,xmm5   ; .tx1
        addps     xmm0,xmm1   ; .n1
        mov       eax,.x_min
        mov       dword .lx1,eax
      @@:
        movaps    .tx1,xmm3
        mov       ecx,.lx2
        movaps    .n1,xmm0
        mov       eax,.x_max
        cmp       ecx,eax
        cmovnl    ecx,eax
        mov       .lx2,ecx
        mov       eax,.x_res
        mul       dword .y
        add       eax,.lx1
        shl       eax,2
        add       edi,eax
        shl       eax,2
        add       esi,eax      ; every pixel 4 32bit float values
        mov       ecx,.lx2
        sub       ecx,.lx1
        movaps    xmm2,.tx1
   .ddraw:
        pcmpeqd   xmm1,xmm1
        push      ecx
        xorps     xmm5,xmm5
        movaps    xmm7,.n1
        dpps      xmm7,xmm7,0xff
        rsqrtps   xmm7,xmm7
        mulps     xmm7,.n1
        movaps    .cnv,xmm7

        movaps    xmm6,xmm2
        minps     xmm6,.tex_m2 ; float  TEX_X-2,TEX_Y-2
        cvttps2dq xmm7,xmm6
        cvtdq2ps  xmm4,xmm7
        subps     xmm6,xmm4
        movlps    .xf,xmm6
        ; slices
        shufps    xmm2,xmm2,10101010b
        movaps    xmm3,xmm2

        addps     xmm3,.aprox
        subps     xmm2,.aprox
        cmpnltps  xmm2,[esi]
        cmpnltps  xmm3,[esi]
        xorps     xmm2,xmm3
        movmskps  ebx,xmm2
        shufps    xmm2,xmm2,0
        mov       .mask_trp,ebx
        mov       eax,lights_aligned   ; global label
      .again_col:

        movaps    xmm0,[eax] ; calc multple lights
        dpps      xmm0,.cnv,01110111b
        movaps    xmm4,xmm0 ; reflective part of light equation
        mov       ecx,6
      .mml:
        mulps     xmm4,xmm4 ; only for first front slice.
        loop      .mml
        mulps     xmm4,[eax+48]
        andps     xmm4,xmm2
    ; .no_reflective:

        xorps     xmm2,xmm1
        mulps     xmm0,[eax+16]
        andps     xmm0,xmm2
        addps     xmm4,xmm0

        addps     xmm4,[eax+32]
        maxps     xmm5,xmm4
        add       eax,64
        cmp       eax,lights_aligned_end
        jnz       .again_col
        minps     xmm5,.mask_255f
        ; texture coords work
        sub       esp,8
        mov       ecx,.tex_shift
        movlps    [esp],xmm7
        pop       eax ebx
        shl       ebx,cl          ; TEX_SHIFT
        add       eax,ebx
        and       eax,.tex_size
        shl       eax,2
        add       eax,.tx_ptr
        mov       ebx,eax
        add       ebx,.tex_x4

        movlps    xmm7,[eax]
        movlps    xmm6,[ebx]
        movlps    xmm1,.xf

        call      bi_filter       ; proc in '2bi_fil.inc' file
        mulps     xmm5,xmm7
        mov       ebx,.s_fact
        mov       eax,.mask_trp
        bsf       eax,eax
        shl       eax,4
        add       ebx,eax
        mulps     xmm5,[ebx]
        minps     xmm5,.word_max_f
        cvtps2dq  xmm5,xmm5
        psrld     xmm5,7
        movd      xmm6,[edi]
        packssdw  xmm5,xmm5
        packuswb  xmm5,xmm5
        paddusb   xmm5,xmm6
        movd      [edi],xmm5

        pop       ecx
        add       edi,4
        add       esi,4*4
        movaps    xmm2,.tx1
        movaps    xmm0,.n1        ; cur normal
        addps     xmm0,.dn
        addps     xmm2,.dtx
        movaps    .n1,xmm0
        movaps    .tx1,xmm2
        dec       ecx
        jnz       .ddraw
  .end_line:
        add       esp,256+16
        pop       ebp

ret
